##Loading required modules
pacman::p_load(sf, funModeling,maptools,raster, spatstat, tmap , tidyverse,sfdep , knitr) Above we have imported all the relevant modules and now we will be importing all the relevant Data.
Here we are importing the Nigeria adm data and removing any duplicates and filtering our data to only include our area of study, Osun state.
NGA <- st_read("./data/geospatial/",
layer = "nga_admbnda_adm2_osgof_20190417") %>%
st_transform(crs = 26392)Reading layer `nga_admbnda_adm2_osgof_20190417' from data source
`/Users/shaythuram/Desktop/Geospatial Analytics and Applications/Content/QUARTO SITE/Take Home Assignment/data/geospatial'
using driver `ESRI Shapefile'
Simple feature collection with 774 features and 16 fields
Geometry type: MULTIPOLYGON
Dimension: XY
Bounding box: xmin: 2.668534 ymin: 4.273007 xmax: 14.67882 ymax: 13.89442
Geodetic CRS: WGS 84
NGA <- NGA %>%dplyr::select(c(3:4, 8:9))
NGA$ADM2_EN[duplicated(NGA$ADM2_EN)==TRUE][1] "Bassa" "Ifelodun" "Irepodun" "Nasarawa" "Obi" "Surulere"
NGA$ADM2_EN[94] <- "Bassa, Kogi"
NGA$ADM2_EN[95] <- "Bassa, Plateau"
NGA$ADM2_EN[304] <- "Ifelodun, Kwara"
NGA$ADM2_EN[305] <- "Ifelodun, Osun"
NGA$ADM2_EN[355] <- "Irepodun, Kwara"
NGA$ADM2_EN[356] <- "Irepodun, Osun"
NGA$ADM2_EN[519] <- "Nasarawa, Kano"
NGA$ADM2_EN[520] <- "Nasarawa, Nasarawa"
NGA$ADM2_EN[546] <- "Obi, Benue"
NGA$ADM2_EN[547] <- "Obi, Nasarawa"
NGA$ADM2_EN[693] <- "Surulere, Lagos"
NGA$ADM2_EN[694] <- "Surulere, Oyo"
NGA$ADM2_EN[duplicated(NGA$ADM2_EN)==TRUE]character(0)
NGA<-NGA %>%filter(`ADM1_EN` == "Osun")Now we are going to import our Water Point Data and filter it to only include Osun state and applying the right projection system to our data.
wp_nga <- read_csv("./data/aspatial/WPdx.csv") %>%
filter(`#clean_country_name` == "Nigeria")Warning: One or more parsing issues, call `problems()` on your data frame for details,
e.g.:
dat <- vroom(...)
problems(dat)
Rows: 426774 Columns: 74
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (46): #source, #report_date, #status_id, #water_source_clean, #water_sou...
dbl (23): row_id, #lat_deg, #lon_deg, #install_year, #fecal_coliform_value, ...
lgl (5): #rehab_year, #rehabilitator, is_urban, latest_record, is_duplicate
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
wp_nga <- wp_nga %>%filter(`#clean_adm1` == "Osun")
wp_nga$Geometry = st_as_sfc(wp_nga$`New Georeferenced Column`)
wp_sf <- st_sf(wp_nga, crs=4326)
wp_sf <- wp_sf %>%st_transform(crs = 26392)Now we remove our “Unknown” values in the #status_clean column and create two sf’s called Functional and Non-Functional based on the filterings below.
wp_sf_nga <- wp_sf %>%
rename(status_clean = '#status_clean') %>%
select(status_clean) %>%
mutate(status_clean = replace_na(
status_clean, "unknown"))
wp_functional <- wp_sf_nga %>%
filter(status_clean %in%
c("Functional",
"Functional, needs repair",
"Functional, not in use",
"Functional but not in use"))
wp_nonfunctional <- wp_sf_nga %>%
filter(status_clean %in%
c("Abandoned/Decommissioned",
"Non-Functional, dry",
"Non-Functional"))The code chunk below uses as_Spatial() of sf package to convert the three geospatial data from simple feature data frame to sp’s Spatial* class.
NGA_spatial <- as_Spatial(NGA)
wp_spatial_functional <- as_Spatial(wp_functional)
wp_spatial_non_functional <- as_Spatial(wp_nonfunctional)spatstat requires the analytical data in ppp object form. There is no direct way to convert a Spatial* classes into ppp object. We need to convert the Spatial classes* into Spatial object first.
The codes chunk below converts the Spatial* classes into generic sp objects.
wp_sp_functional <- as(wp_spatial_functional, "SpatialPoints")
wp_sp_non_functional <- as(wp_spatial_non_functional, "SpatialPoints")
NGA_sp <- as(NGA_spatial, "SpatialPolygons")Now, we will use as.ppp() function of spatstat to convert the spatial data into spatstat’s ppp object format.
wp_functional_ppp <- as(wp_sp_functional, "ppp")
wp_non_functional_ppp <- as(wp_sp_non_functional, "ppp")Now we check for duplicates
sum(multiplicity(wp_functional_ppp) > 1)[1] 0
sum(multiplicity(wp_non_functional_ppp) > 1)[1] 0
When analysing spatial point patterns, it is a good practice to confine the analysis with a geographical area like Singapore boundary. In spatstat, an object called owin is specially designed to represent this polygonal region. so let’s convert NGA into the Owin format’
NGA_owin <- as(NGA_sp, "owin")
plot(NGA_owin)
In this last step of geospatial data wrangling, we will extract childcare events that are located within Singapore by using the code chunk below.
wp_functional_NGA_ppp = wp_functional_ppp[NGA_owin]
wp_non_functional_NGA_ppp = wp_non_functional_ppp[NGA_owin]In the code chunk below, rescale() is used to covert the unit of measurement from meter to kilometer.
wp_functional_NGA_ppp <- rescale(wp_functional_NGA_ppp, 1000, "km")
wp_non_functional_NGA_ppp <- rescale(wp_non_functional_NGA_ppp, 1000, "km")Now, we can run density() using the resale data set and plot the output kde map
Here is the KDE map for Functional waterpoints
kde_functional_wpNGA_bw <- density(wp_functional_NGA_ppp,
sigma=bw.diggle,
edge=TRUE,
kernel="gaussian")
plot(kde_functional_wpNGA_bw)
Here is the KDE map for Non-Functional Waterpoints.
kde_non_functional_wpNGA_bw <- density(wp_non_functional_NGA_ppp, sigma=bw.diggle, edge=TRUE, kernel="gaussian")
plot(kde_non_functional_wpNGA_bw)
gridded_kde_non_functional_wpNGA.bw<- as.SpatialGridDataFrame.im(kde_non_functional_wpNGA_bw)Now we convert each of the above KDE outputs into grid objects
For Functional Waterpoints :
As shown below we observe spots of highly density areas with Non Functional waterpoints meaning that the distribution of Non Functional Waterpoints in Osun are not randomly distributed.
gridded_kde_functional_wpNGA.bw<- as.SpatialGridDataFrame.im(kde_functional_wpNGA_bw)
plot(gridded_kde_functional_wpNGA.bw)
For Non Functional Waterpoints :
As shown below we observe spots of highly density areas with Non Functional waterpoints meaning that the distribution of Non Functional Waterpoints in Osun are not randomly distributed.
gridded_kde_non_functional_wpNGA.bw<- as.SpatialGridDataFrame.im(kde_non_functional_wpNGA_bw)
plot(gridded_kde_non_functional_wpNGA.bw)
Next, we will convert the gridded kernal density objects into RasterLayer object by using raster() of raster package. Note that we need to re-declare the CRS information as converting our data to OWIN format removed this information.
Only then can we display the raster in cartographic quality map using tmap package.
For Non Functional Waterpoints :
gridded_kde_non_functional_wpNGA.bw_raster <- raster(gridded_kde_non_functional_wpNGA.bw)
projection(gridded_kde_non_functional_wpNGA.bw_raster) <- CRS("+init=EPSG:26392 +datum=WGS84 +units=km")
gridded_kde_non_functional_wpNGA.bw_rasterclass : RasterLayer
dimensions : 128, 128, 16384 (nrow, ncol, ncell)
resolution : 0.8948485, 0.9616045 (x, y)
extent : 176.5032, 291.0438, 331.4347, 454.5201 (xmin, xmax, ymin, ymax)
crs : +init=EPSG:26392 +datum=WGS84 +units=km
source : memory
names : v
values : -4.551756e-15, 20.49412 (min, max)
tmap_mode('view') tmap mode set to interactive viewing
tm_basemap("OpenStreetMap") tm_shape(gridded_kde_non_functional_wpNGA.bw_raster) +
tm_raster("v") +
tm_layout(legend.position = c("right", "bottom"), frame = FALSE)legend.postion is used for plot mode. Use view.legend.position in tm_view to set the legend position in view mode.
Variable(s) "v" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.
For Functional Waterpoints :
gridded_kde_functional_wpNGA.bw_raster <- raster(gridded_kde_functional_wpNGA.bw)
projection(gridded_kde_functional_wpNGA.bw_raster) <- CRS("+init=EPSG:26392 +datum=WGS84 +units=km")
gridded_kde_functional_wpNGA.bw_rasterclass : RasterLayer
dimensions : 128, 128, 16384 (nrow, ncol, ncell)
resolution : 0.8948485, 0.9616045 (x, y)
extent : 176.5032, 291.0438, 331.4347, 454.5201 (xmin, xmax, ymin, ymax)
crs : +init=EPSG:26392 +datum=WGS84 +units=km
source : memory
names : v
values : -4.599238e-15, 24.34507 (min, max)
tmap_mode('view') tmap mode set to interactive viewing
tm_basemap("OpenStreetMap") tm_shape(gridded_kde_functional_wpNGA.bw_raster) +
tm_raster("v") +
tm_layout(legend.position = c("right", "bottom"), frame = FALSE)legend.postion is used for plot mode. Use view.legend.position in tm_view to set the legend position in view mode.
Variable(s) "v" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.
#Part 1 DoneNow we are going to compute L-function estimation by using Lest() of spatstat package. W will also how to perform monta carlo simulation test using envelope() of spatstat package.
L_osun_functional = Lest(wp_functional_NGA_ppp, correctionc = "Ripley")
plot(L_osun_functional, . -r ~ r,
ylab= "L(d)-r", xlab = "d(km)")
To confirm the observed spatial patterns above, a hypothesis test will be conducted. The hypothesis and test are as follows:
Ho = The distribution of Functional Waterpoints in Osun are randomly distributed.
H1= The distribution of Functional Waterpoints in Osun are not randomly distributed.
The null hypothesis will be rejected if p-value if smaller than alpha value of 0.05. Confidence interval is therefore 95%
The code chunk below is used to perform the hypothesis testing.
L_osun_functional.csr <- envelope(wp_functional_NGA_ppp, Lest, nsim = 39, rank = 1, glocal=TRUE)Generating 39 simulations of CSR ...
1, 2, [etd 1:22:19] 3, [etd 1:24:48] 4,
[etd 1:22:18] 5, [etd 1:19:36] 6, [etd 1:17:10] 7, [etd 1:15:01] 8,
[etd 1:12:31] 9, [etd 1:10:13] 10, [etd 1:07:59] 11, [etd 1:05:33] 12,
[etd 1:02:40] 13, [etd 1:00:45] 14, [etd 58:25] 15, [etd 56:00] 16,
[etd 53:53] 17, [etd 51:35] 18, [etd 49:23] 19, [etd 47:01] 20,
[etd 44:46] 21, [etd 42:29] 22, [etd 40:09] 23, [etd 37:46] 24,
[etd 35:22] 25, [etd 33:02] 26, [etd 30:43] 27, [etd 28:21] 28,
[etd 25:57] 29, [etd 23:34] 30, [etd 21:13] 31, [etd 18:50] 32,
[etd 16:28] 33, [etd 14:09] 34, [etd 11:48] 35, [etd 9:24] 36,
[etd 7:03] 37, [etd 4:43] 38, [etd 2:22] 39.
Done.
plot(L_osun_functional.csr, . - r ~ r, xlab="d", ylab="L(d)-r")
Based on the output of CSR as shown above, the curve is above envelope as such we have enough statisctial evidence to reject the null hypothesis at 95% confidence. Therefore we can say that that the distribution of Functional waterpoints are not randomly distributed with 95% confidence.
L_osun_non_functional = Lest(wp_non_functional_NGA_ppp, correctionc = "Ripley")
plot(L_osun_non_functional, . -r ~ r, ylab= "L(d)-r", xlab = "d(km)")
To confirm the observed spatial patterns above, a hypothesis test will be conducted. The hypothesis and test are as follows:
Ho = The distribution of Non Functional Waterpoints in Osun are randomly distributed.
H1= The distribution of Non Functional Waterpoints in Osun are not randomly distributed.
The null hypothesis will be rejected if p-value if smaller than alpha value of 0.05. Confidence interval is therefore 95%
The code chunk below is used to perform the hypothesis testing.
L_osun_non_functional.csr <- envelope(wp_non_functional_NGA_ppp, Lest, nsim = 39, rank = 1, glocal=TRUE)Generating 39 simulations of CSR ...
1, 2, [etd 59:34] 3, [etd 58:46] 4,
[etd 56:54] 5, [etd 53:58] 6, [etd 52:31] 7, [etd 50:27] 8,
[etd 49:57] 9, [etd 48:12] 10, [etd 46:21] 11, [etd 44:51] 12,
[etd 43:34] 13, [etd 41:53] 14, [etd 40:07] 15, [etd 38:11] 16,
[etd 36:26] 17, [etd 34:30] 18, [etd 33:00] 19, [etd 31:19] 20,
[etd 29:41] 21, [etd 28:00] 22, [etd 26:27] 23, [etd 24:57] 24,
[etd 23:29] 25, [etd 21:55] 26, [etd 20:24] 27, [etd 18:47] 28,
[etd 17:13] 29, [etd 15:39] 30, [etd 14:28] 31, [etd 12:49] 32,
[etd 11:11] 33, [etd 9:35] 34, [etd 7:59] 35, [etd 6:24] 36,
[etd 4:47] 37, [etd 3:11] 38, [etd 1:35] 39.
Done.
plot(L_osun_non_functional.csr, . - r ~ r, xlab="d", ylab="L(d)-r")
Based on the output of CSR as shown above, the curve is above envelope as such we have enough statisctial evidence to reject the null hypothesis at 95% confidence. Therefore we can say that that the distribution of Non Functional waterpoints are not randomly distributed with 95% confidence.
Advantages of kernel density maps over point maps are as follows:
Smoothing: Kernel density maps can smooth out spatial patterns in the data and reveal underlying trends that might not be apparent in a point map. This can be especially useful when the data is dense or there is a lot of variability in the data.
Area representation: Kernel density maps represent the intensity of data over an area rather than just at specific locations. This makes it easier to see patterns in the data and to make generalizations about the distribution of the data.
Handling of sparse data: Kernel density maps can be useful for visualizing sparse data that is spread out over a large area. In a point map, the sparse data might not be noticeable, whereas in a kernel density map, it can be easily seen as a low intensity area.
Improved readability: Kernel density maps can be easier to read and interpret than point maps, especially when the data is dense or there are many overlapping points. In a kernel density map, the data is represented as a continuous surface, which can make it easier to see patterns and relationships in the data.
For part 3 we are going to reload our wp data as it will require different kind of manipulation from the start. We also need some different modules
pacman::p_load(sf, funModeling,maptools,raster, spatstat, tmap , tidyverse,sfdep) wp_nga <- read_csv("./data/aspatial/WPdx.csv") %>%
filter(`#clean_country_name` == "Nigeria")Warning: One or more parsing issues, call `problems()` on your data frame for details,
e.g.:
dat <- vroom(...)
problems(dat)
Rows: 426774 Columns: 74
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (46): #source, #report_date, #status_id, #water_source_clean, #water_sou...
dbl (23): row_id, #lat_deg, #lon_deg, #install_year, #fecal_coliform_value, ...
lgl (5): #rehab_year, #rehabilitator, is_urban, latest_record, is_duplicate
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
wp_nga <- wp_nga %>%filter(`#clean_adm1` == "Osun")
wp_nga$Geometry = st_as_sfc(wp_nga$`New Georeferenced Column`)wp_sf <- st_sf(wp_nga, crs=4326)
wp_sf <- wp_sf %>%st_transform(crs = 26392)We are going to ensure that we only have two types of variables in #status_clean. We will drop those that are not labelled (NA) and then rename the rest to functional and non-functional where appropriate
wp_sf_nga <- wp_sf %>%
rename(status_clean = '#status_clean') %>%
select(status_clean) %>%
mutate(status_clean = str_replace(status_clean, "Functional, needs repair", "Functional"))
wp_sf_nga <- wp_sf_nga %>%
rename(status_clean = 'status_clean') %>%
select(status_clean) %>%
mutate(status_clean = str_replace(status_clean, "Functional, not in use", "Functional"))
wp_sf_nga <- wp_sf_nga %>%
rename(status_clean = 'status_clean') %>%
select(status_clean) %>%
mutate(status_clean = str_replace(status_clean, "Functional but not in use", "Functional"))
wp_sf_nga <- wp_sf_nga %>%
rename(status_clean = 'status_clean') %>%
select(status_clean) %>%
mutate(status_clean = str_replace(status_clean, "Non-Functional, dry", "Non-Functional"))
wp_sf_nga <- wp_sf_nga %>%
rename(status_clean = 'status_clean') %>%
select(status_clean) %>%
mutate(status_clean = str_replace(status_clean, "Abandoned/Decommissioned", "Non-Functional"))
wp_sf_nga <- wp_sf_nga %>%drop_na('status_clean')#Successful renaming
unique(wp_sf_nga$status_clean)[1] "Functional" "Non-Functional"
tmap_mode("view")tmap mode set to interactive viewing
tm_shape(NGA) +
tm_polygons() +
tm_shape(wp_sf_nga)+
tm_dots(col = "status_clean",
size = 0.01,
border.col = "black",
border.lwd = 0.5) In the code chunk below, st_knn() of sfdep package is used to determine the k (i.e. 6) nearest neighbours for given point geometry.
nb <- include_self(
st_knn(st_geometry(wp_sf_nga), 6))Computing kernel weights : In the code chunk below, st_kernel_weights() of sfdep package is used to derive a weights list by using a kernel function.
wt <- st_kernel_weights(nb,
wp_sf_nga,
"gaussian",
adaptive = TRUE)To compute LCLQ by using sfdep package, the reference point data must be in either character or vector list. The code chunks below are used to prepare two vector lists.One of Functional and for Non-Functional and are called A and B respectively.
Functional <- wp_sf_nga %>%
filter(status_clean == "Functional")
A <- Functional$status_cleanNonFunctional <- wp_sf_nga %>%
filter(status_clean == "Non-Functional")
B <- NonFunctional$status_cleanIn the code chunk below local_colocation() us used to compute the LCLQ values for Functional point event.
LCLQ <- local_colocation(A, B, nb, wt, 39)Before we can plot the LCLQ values their p-values, we need to join the output of local_colocation() to the stores sf data.frame. However, a quick check of LCLQ data-frame, we can’t find any field can be used as the join field. As a result, cbind() of Base R is useed.
LCLQ_wp <- cbind(wp_sf_nga, LCLQ) In the code chunk below, tmap functions are used to plot the LCLQ analysis.
tmap_mode("view")tmap mode set to interactive viewing
tm_shape(NGA) +
tm_polygons() +
tm_shape(LCLQ_wp)+
tm_dots(col = "Non.Functional",
size = 0.01,
border.col = "black",
border.lwd = 0.5) In order to conduct leve 2 spatial analysis we need to conver our data to PPP format as shown below
LCLQ_ppp <- as_Spatial(LCLQ_wp)
LCLQ_ppp <- as(LCLQ_ppp, "SpatialPoints")
LCLQ_ppp <- as(LCLQ_ppp, "ppp")
LCLQ_pppPlanar point pattern: 4997 points
window: rectangle = [177285.9, 290750.96] x [340054.1, 450859.7] units
H0 = The spatial distribution of functional and non-functional water points are independent from each other.}
H1 = The spatial distribution of functional and non-functional water points are [not]{. underline} independent from each other.
The null hypothesis will be rejected if p-value if smaller than alpha value of 0.05. Confidence interval is therefore 95%
The code chunk below is used to perform the hypothesis testing.
plot(envelope(LCLQ_ppp, Lest, nsim = 39, rank = 1, glocal=TRUE))Generating 39 simulations of CSR ...
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39.
Done.

Based on the output of CSR as shown above, the curve is above envelope as such we have enough statisctial evidence to reject the null hypothesis at 95% confidence. he spatial distribution of functional and non-functional water points are not independent from each other with 95% confidence